suppressPackageStartupMessages({
library(epiwraps)
library(rtracklayer)
library(GenomicRanges)
library(ggplot2)
library(ComplexHeatmap)
library(chromVAR)
library(motifmatchr)
library(memes)
library(MotifDb)
library(universalmotif)
library(TFBSTools)
library(AnnotationHub)
library(RColorBrewer)
library(viridis)
library(viridisLite)
library(ggrepel)
library(magick)
library(ggpubr)
library(cowplot)
})
## Possible Ensembl SSL connectivity problems detected.
## Please see the 'Connection Troubleshooting' section of the biomaRt vignette
## vignette('accessing_ensembl', package = 'biomaRt')Error in curl::curl_fetch_memory(url, handle = handle) :
## Peer certificate cannot be authenticated with given CA certificates: [uswest.ensembl.org] SSL certificate problem: certificate has expired
## See system.file("LICENSE", package="MotifDb") for use restrictions.
theme_set(theme_bw())
set.seed(123)
load signal files load Veh track for NF,Mono and Full; and give names
bw_ATAC_mono <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)mono.bw",full.names = TRUE )
bw_ATAC_nf <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)NF.bw",full.names = TRUE )
bw_ATAC_full <- list.files(path = "../Dex_Veh/tracks/", pattern ="*Veh.(.*)full.bw",full.names = TRUE )
names(bw_ATAC_mono) <- c("ATAC_Mono_1","ATAC_Mono_2")
names(bw_ATAC_nf) <- c("ATAC_NF_1","ATAC_NF_2")
names(bw_ATAC_full) <- c("ATAC_Full_1","ATAC_Full_2")
import 5fC sites and getting unique 5fC and no5fC sites
sites_5fC <- readRDS("../object_resources/5fC/sites_5fC.rds")
sites_no5fC <- readRDS("../object_resources/5fC/sites_no5fC.rds")
load TSS with and without 5fC
TSS_2KB_5fC <- readRDS("../object_resources/5fC/TSS_2KB_5fC.rds")
TSS_2KB_no5fC <- readRDS("../object_resources/5fC/TSS_2KB_no5fC.rds")
generation of normalization factors
bg_nf <- bwNormFactors(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full), method = "background")
## Warning: bwNormFactors is deprecated, please gradually switch to
## `getNormFactors`.
## Comparing coverage in random regions...
sample no5fC regions to match
sampled_no5fC <- TSS_2KB_no5fC[sample(c(1:length(TSS_2KB_no5fC)),size = length(TSS_2KB_5fC))]
list_5fC_no5fC <- unlist(GRangesList(TSS_2KB_5fC,sampled_no5fC))
cluster regions 5fC top and no5fC bottom
# cluster regions 5fC top and no5fC bottom
i <- integer(length(list_5fC_no5fC))
i[1:(length(list_5fC_no5fC)/2)] <- "TSS with 5fC"
i[((length(list_5fC_no5fC)/2)+1):length(list_5fC_no5fC)] <- "TSS with no 5fC"
#saveRDS(i,"../object_resources/figure_4_meta/cluster_5fC_no5fC.LIST.rds")
mycolors <- c("TSS with 5fC"=brewer.pal(n=9,"Set1")[1], "TSS with no 5fC"=brewer.pal(n=9,"Set1")[2])
m_5fC_no5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 2L, regions = list_5fC_no5fC,type = "center")
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_5fC_no5fC_bg <- rescaleSignalMatrices(m_5fC_no5fC,bg_nf)
#merging replicates in the signal matrices
sm2 <- list( Nuc.Free =mergeSignalMatrices(m_5fC_no5fC_bg[1:2],aggregation = "mean"), Mononucleosome=mergeSignalMatrices(m_5fC_no5fC_bg[3:4],aggregation = "mean"), Full=mergeSignalMatrices(m_5fC_no5fC_bg[5:6],aggregation = "mean") )
saveRDS(sm2,"../object_resources/figure_4_meta/matrix_mm.rds")
m_mm <- readRDS("../object_resources/figure_4_meta/matrix_mm.rds")
mycolors <- c("TSS with 5fC"=brewer.pal(n=9,"Set1")[1], "TSS with no 5fC"=brewer.pal(n=9,"Set1")[2])
i <- readRDS("../object_resources/figure_1_meta/cluster_5fC_no5fC.LIST.rds")
p1 <- epiwraps::plotEnrichedHeatmaps((m_mm), row_split = i,scale_title = "Backg.\nnorm.\ndensity", axis_name = c("-2kb","TSS","+2kb"), colors = rocket(100), mean_color = mycolors, mean_scale_side = "left", top_annotation = FALSE,use_raster = TRUE)
p1
#saveRDS(p1,"../object_resources/figure_4_meta/p1.rds")
#p1 <- readRDS("../object_resources/figure_4_meta/p1.rds")
generate individual matrices for 5fC and no5fC
# with 5fC
m_5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 10L, regions = TSS_2KB_5fC)
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_5fC_bg <- rescaleSignalMatrices(m_5fC,bg_nf)
# without 5fC
m_no5fC <- signal2Matrix(c(bw_ATAC_nf,bw_ATAC_mono,bw_ATAC_full),w = 10L, regions = TSS_2KB_no5fC)
## Reading ../Dex_Veh/tracks//Veh4.NF.bw
## Reading ../Dex_Veh/tracks//Veh5.NF.bw
## Reading ../Dex_Veh/tracks//Veh4.mono.bw
## Reading ../Dex_Veh/tracks//Veh5.mono.bw
## Reading ../Dex_Veh/tracks//Veh4.full.bw
## Reading ../Dex_Veh/tracks//Veh5.full.bw
m_no5fC_bg <- rescaleSignalMatrices(m_no5fC,bg_nf)
aggregate signal
# with the matrices we generate dataframes with mean signal per position
d_5fC <- meltSignals(m_5fC_bg,trim = c(0.02,0.98))
d_no5fC <- meltSignals(m_no5fC_bg,trim = c(0.02,0.98))
# we merge both dataframes
dmerge <- dplyr::bind_rows(list("5fC"=d_5fC, "no5fC"=d_no5fC), .id="regions")
dmerge
#saveRDS(dmerge,"../object_resources/figure_4_meta/dmerge_1.rds")
# we load the merged dataframe
#dmerge <- readRDS("../object_resources/figure_4_meta/dmerge_1.rds")
# we plot the aggregate signals by region and fragment size distribution
p2 <- plot_grid(
ggplot(dmerge[dmerge$sample == "ATAC_NF_1"|dmerge$sample == "ATAC_NF_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
geom_vline(xintercept=0, linetype="dashed") +
labs(x="TSS", y="mean background norm. density")+
stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+
ggtitle("Nuc. Free")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),legend.position = "none"),
ggplot(dmerge[dmerge$sample == "ATAC_Mono_1"|dmerge$sample == "ATAC_Mono_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
geom_vline(xintercept=0, linetype="dashed") +
stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+
labs(x="TSS")+
ggtitle("Mononucleosome")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),axis.title.y = element_blank(),legend.position = "none",axis.text.y = element_blank()),
ggplot(dmerge[dmerge$sample == "ATAC_Full_1"|dmerge$sample == "ATAC_Full_2",], aes(position, mean, colour=regions, group = regions, fill = regions)) +
geom_vline(xintercept=0, linetype="dashed") +
stat_summary(geom="ribbon", alpha=0.3, colour = NA ) + stat_summary(fun=mean, geom="line", size = 0.6)+
xlab("TSS")+
ggtitle("Full")+ scale_color_brewer(palette="Set1")+scale_fill_brewer(palette="Set1")+ylim(0,0.1)+theme(text = element_text(size = 10),axis.title.y = element_blank(),axis.text.y = element_blank()),
ncol = 3, nrow = 1,rel_widths = c(1.15,1,1.2)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
p2
#saveRDS(p2,"../object_resources/figure_4_meta/p2.rds")
#p2 <- readRDS("../object_resources/figure_4_meta/p2.rds")
previous data was generated with methylation data from an unpublished source therefore it is now repeated with published work (rando)
import unique 5fC sites in proximity to TSS
unique_5fC_sites_in_TSS <- readRDS("../object_resources/5fC/unique_5fC_sites_in_TSS.rds")
import genome fasta file (GRCm38)
genome <- Rsamtools::FaFile("../object_resources/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa")
genome
## class: FaFile
## path: ../object_resources/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa
## index: ../object_resources.../Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.fai
## gzindex: ../object_resourc.../Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gzi
## isOpen: FALSE
## yieldSize: NA
loading Rando data methylation levels at promoters and perform liftover (PL)
methylation_mm9 <- readRDS("../object_resources/GSE75323_sperm_prom_methylation.mm9.rds")
methylation_mm9 <- GRanges(methylation_mm9)
seqlevelsStyle(methylation_mm9)
## [1] "UCSC"
mm9_mm10_chain <- import.chain("../object_resources//mm9ToMm10.over.chain")
methylation_total <- liftOver(methylation_mm9,chain = mm9_mm10_chain)
methylation_total <- unlist(methylation_total)
seqlevelsStyle(methylation_total) <- "Ensembl"
saveRDS(methylation_total,"../object_resources/methylation_total_grcm38.GR.rds")
load GRCm38 sperm methylation data and check for promoters with methylation score >0.5
#methylation_total <- readRDS("../object_resources/methylation_rando_grcm38.GR.rds")
methyl_promoters <- methylation_total[methylation_total$meanMeth>0.5]
load motifs and check select non-redundant TFs
core <- read_meme(file = "../object_resources/HOCOMOCOv11_core_MOUSE_mono_meme_format.meme") # needs to be the right path
#full <- read_meme(file = "HOCOMOCOv11_full_MOUSE_mono_meme_format.meme")
# original function PL
summary_core <- universalmotif::summarise_motifs(core)
core <- setNames(object = core,nm = summary_core$name)
motifs <- core
modf <- data.frame(row.names=summary_core$name,
TF=gsub("_MOUSE.H11MO...+","",summary_core$name),
grade=gsub(".+\\.","",summary_core$name),
motif_rank = gsub("[^01]+","",gsub(".*MO.",replacement = "",summary_core$name))) #gsub("[^ro]+", "", str1)
modf <- modf[order(modf$TF,modf$grade,modf$motif_rank),]
modf <- modf[!duplicated(modf$TF),]
modf <- modf[!modf$grade == "D",]
motifs <- setNames(universalmotif::convert_motifs(motifs[row.names(modf)]), modf$TF)
peak_centers <- resize(unique_5fC_sites_in_TSS,fix = "center",width = 200)
peak_seqs <- memes::get_sequence(dropSeqlevels(peak_centers,"MT",pruning.mode = "coarse"), genome)# dropping MT due to its absence in reference genome file
peak_centers <- reduce(promoters(methyl_promoters, upstream = 1000, downstream = 100))
peak_seqs_methyl_prom <- memes::get_sequence(dropSeqlevels(peak_centers,"MT",pruning.mode = "coarse"), genome)# dropping MT due to its absence in reference genome file
ame <- memes::runAme(peak_seqs,control = peak_seqs_methyl_prom , database=convert_motifs(motifs,class = "universalmotif-universalmotif"), meme_path="/common/meme/bin/")
head(ame)
ame$motif <- basename(ame$motif_db)
#saveRDS(ame,"../object_resources/figure_4_meta/motif_enrichment_5fC_5mC_promoter.rds")
load sample
#ame <- readRDS("../object_resources/figure_4_meta/motif_enrichment_5fC_5mC_promoter.rds")
ame <- ame[!ame$fp==0,] #TFs with no false positive are removed since no fold change can be calculated
# results with no false positive were removed
p3 <- ggplot(ame, aes(log2(((tp/pos))/((fp/neg))), -log10(adj.pvalue))) +
geom_point(alpha=0.7, size =2.5) + geom_text_repel(data=head((ame),10), aes(label=motif),max.overlaps = 20, size = 4,nudge_y = 0.5) +
labs(x="log2FE")+ xlim(c(0,6.5))+theme(text = element_text(size = 12))#+ ggtitle("5fC sites in TSS proximity compared to methylated promoter regions")
p3.2 <- p3 + geom_hline(yintercept=-log10(0.05), linetype="dashed")+
geom_text(aes(5.5,-log10(0.05),label = "adjst.p-value = 0.05", vjust = -1.0),size = 3.0)
p3.2
#plot_grid(grid.grabExpr(ComplexHeatmap::draw(p1)), nrow=1, rel_widths=c(5,3))
pp <- plot_grid(plot_grid(grid.grabExpr(ComplexHeatmap::draw(p1)),p3.2,labels = c("A","C"),rel_widths = c(3,2)),p2,labels = c(NA,"B"),nrow = 2,rel_heights = c(3,2))
pp
## Warning: Removed 1 rows containing missing values (`geom_text()`).
pp
## Warning: Removed 1 rows containing missing values (`geom_text()`).
pdf("figure_4_meta.pdf", width=10, height=7.5)
pp # no figure head
## Warning: Removed 1 rows containing missing values (`geom_text()`).
dev.off()
## png
## 2
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.1 ggpubr_0.4.0 magick_2.8.1
## [4] ggrepel_0.9.3 viridis_0.6.2 viridisLite_0.4.2
## [7] RColorBrewer_1.1-3 AnnotationHub_2.22.1 BiocFileCache_1.14.0
## [10] dbplyr_2.1.1 TFBSTools_1.28.0 universalmotif_1.8.5
## [13] MotifDb_1.32.0 Biostrings_2.58.0 XVector_0.30.0
## [16] memes_0.99.11 motifmatchr_1.12.0 chromVAR_1.12.0
## [19] ggplot2_3.4.2 rtracklayer_1.50.0 epiwraps_0.99.72
## [22] EnrichedHeatmap_1.29.2 ComplexHeatmap_2.15.1 GenomicRanges_1.42.0
## [25] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
## [28] BiocGenerics_0.36.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 R.methodsS3_1.8.2
## [3] tidyr_1.2.0 bit64_4.0.5
## [5] knitr_1.45 DelayedArray_0.16.3
## [7] R.utils_2.11.0 data.table_1.14.8
## [9] rpart_4.1.21 KEGGREST_1.30.1
## [11] RCurl_1.98-1.13 AnnotationFilter_1.14.0
## [13] doParallel_1.0.17 generics_0.1.3
## [15] GenomicFeatures_1.42.3 RSQLite_2.2.11
## [17] bit_4.0.5 tzdb_0.3.0
## [19] xml2_1.3.5 httpuv_1.6.9
## [21] SummarizedExperiment_1.20.0 assertthat_0.2.1
## [23] DirichletMultinomial_1.32.0 xfun_0.41
## [25] hms_1.1.1 jquerylib_0.1.4
## [27] evaluate_0.23 promises_1.2.0.1
## [29] fansi_1.0.5 progress_1.2.2
## [31] caTools_1.18.2 DBI_1.1.3
## [33] htmlwidgets_1.5.4 purrr_1.0.1
## [35] ellipsis_0.3.2 dplyr_1.1.1
## [37] backports_1.4.1 annotate_1.68.0
## [39] biomaRt_2.46.3 MatrixGenerics_1.2.1
## [41] vctrs_0.6.1 Biobase_2.50.0
## [43] ensembldb_2.14.1 Cairo_1.6-0
## [45] abind_1.4-5 cachem_1.0.7
## [47] withr_2.5.2 Gviz_1.34.1
## [49] BSgenome_1.58.0 vroom_1.5.7
## [51] checkmate_2.3.0 GenomicAlignments_1.26.0
## [53] prettyunits_1.2.0 cluster_2.1.4
## [55] lazyeval_0.2.2 seqLogo_1.56.0
## [57] crayon_1.5.2 edgeR_3.32.1
## [59] pkgconfig_2.0.3 labeling_0.4.3
## [61] pkgload_1.3.2 ProtGenerics_1.22.0
## [63] nnet_7.3-19 rlang_1.1.2
## [65] lifecycle_1.0.4 miniUI_0.1.1.1
## [67] dichromat_2.0-0.1 rprojroot_2.0.4
## [69] matrixStats_1.1.0 Matrix_1.6-3
## [71] ggseqlogo_0.1 carData_3.0-5
## [73] base64enc_0.1-3 processx_3.8.2
## [75] GlobalOptions_0.1.2 png_0.1-8
## [77] rjson_0.2.21 bitops_1.0-7
## [79] splitstackshape_1.4.8 cmdfun_1.0.2
## [81] R.oo_1.25.0 blob_1.2.2
## [83] shape_1.4.6 stringr_1.5.0
## [85] readr_2.1.2 jpeg_0.1-10
## [87] rstatix_0.7.0 ggsignif_0.6.4
## [89] CNEr_1.26.0 scales_1.2.1
## [91] memoise_2.0.1 magrittr_2.0.3
## [93] plyr_1.8.9 zlibbioc_1.36.0
## [95] compiler_4.0.3 clue_0.3-65
## [97] Rsamtools_2.6.0 cli_3.6.1
## [99] ps_1.7.5 pbapply_1.7-2
## [101] htmlTable_2.4.0 Formula_1.2-5
## [103] MASS_7.3-60 tidyselect_1.2.0
## [105] stringi_1.8.1 highr_0.10
## [107] yaml_2.3.7 askpass_1.2.0
## [109] locfit_1.5-9.4 latticeExtra_0.6-29
## [111] sass_0.4.1 VariantAnnotation_1.36.0
## [113] tools_4.0.3 circlize_0.4.15
## [115] rstudioapi_0.15.0 TFMPvalue_0.0.8
## [117] foreach_1.5.2 foreign_0.8-84
## [119] gridExtra_2.3 farver_2.1.1
## [121] digest_0.6.33 BiocManager_1.30.20
## [123] shiny_1.7.1 pracma_2.4.4
## [125] Rcpp_1.0.11 car_3.0-12
## [127] broom_0.7.12 BiocVersion_3.12.0
## [129] later_1.3.1 httr_1.4.2
## [131] AnnotationDbi_1.52.0 biovizBase_1.38.0
## [133] Rdpack_2.6 colorspace_2.1-0
## [135] brio_1.1.3 XML_3.99-0.15
## [137] splines_4.0.3 plotly_4.10.0
## [139] xtable_1.8-4 jsonlite_1.8.7
## [141] poweRlaw_0.70.6 GenomicFiles_1.26.0
## [143] UpSetR_1.4.0 testthat_3.1.2
## [145] R6_2.5.1 Hmisc_4.6-0
## [147] pillar_1.9.0 htmltools_0.5.7
## [149] mime_0.12 glue_1.6.2
## [151] fastmap_1.1.1 DT_0.22
## [153] BiocParallel_1.24.1 interactiveDisplayBase_1.28.0
## [155] codetools_0.2-19 utf8_1.2.4
## [157] lattice_0.21-8 bslib_0.3.1
## [159] tibble_3.2.1 curl_5.0.0
## [161] gtools_3.9.5 GO.db_3.12.1
## [163] openssl_2.0.0 survival_3.3-1
## [165] limma_3.46.0 rmarkdown_2.13
## [167] desc_1.4.2 munsell_0.5.0
## [169] GetoptLong_1.0.5 GenomeInfoDbData_1.2.4
## [171] iterators_1.0.14 reshape2_1.4.4
## [173] gtable_0.3.3 rbibutils_2.2.16